{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "ename": "ModuleNotFoundError",
     "evalue": "No module named 'snownlp'",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mModuleNotFoundError\u001b[0m                       Traceback (most recent call last)",
      "\u001b[1;32m<ipython-input-1-5f8dc8e71337>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[1;32mfrom\u001b[0m \u001b[0msnownlp\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mSnowNLP\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m      2\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      3\u001b[0m \u001b[1;31m# snownlp需要使用unicode对string进行编码\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      4\u001b[0m \u001b[0msentence\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;34mu\"我很喜欢《肖申克的救赎》\"\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      5\u001b[0m \u001b[0ms\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mSnowNLP\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msentence\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;31mModuleNotFoundError\u001b[0m: No module named 'snownlp'"
     ]
    }
   ],
   "source": [
    "from snownlp import SnowNLP\n",
    "\n",
    "# snownlp需要使用unicode对string进行编码\n",
    "sentence = u\"我很喜欢《肖申克的救赎》\"\n",
    "s = SnowNLP(sentence)\n",
    "\n",
    "# 分词功能\n",
    "print(\"Segmented words = {}\".format(s.words))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Tagging: \n"
     ]
    },
    {
     "ename": "NameError",
     "evalue": "name 's' is not defined",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mNameError\u001b[0m                                 Traceback (most recent call last)",
      "\u001b[1;32m<ipython-input-2-13431efe0a47>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[0;32m      1\u001b[0m \u001b[1;31m# 词性标注功能\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      2\u001b[0m \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"Tagging: \"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 3\u001b[1;33m \u001b[1;32mfor\u001b[0m \u001b[0mword\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtag\u001b[0m \u001b[1;32min\u001b[0m \u001b[0ms\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtags\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m      4\u001b[0m     \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"Word = {}, Tag = {}\\n\"\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mword\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtag\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;31mNameError\u001b[0m: name 's' is not defined"
     ]
    }
   ],
   "source": [
    "# 词性标注功能\n",
    "print(\"Tagging: \")\n",
    "for word, tag in s.tags:\n",
    "    print(\"Word = {}, Tag = {}\\n\".format(word, tag))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Sentiment score = 0.9855234741222814\n"
     ]
    }
   ],
   "source": [
    "# 情感分析: 0 -- Negative, 1 -- Positive\n",
    "print(\"Sentiment score = {}\".format(s.sentiments))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Pinyin = ['wo', 'hen', 'xi', 'huan', '《', 'xiao', 'shen', 'ke', 'de', 'jiu', 'shu', '》']\n"
     ]
    }
   ],
   "source": [
    "# SnowNLP同时也提供其他常用的功能，例如拼音，以及简繁转换\n",
    "print(\"Pinyin = {}\".format(s.pinyin))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', 'bm25', 'doc', 'han', 'idf', 'keywords', 'pinyin', 'sentences', 'sentiments', 'sim', 'summary', 'tags', 'tf', 'words']\n"
     ]
    }
   ],
   "source": [
    "# 显示SnowNLP对象的所有成员变量以及支持的方法\n",
    "print(dir(s))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Sentiment score = 0.4740530808511534\n"
     ]
    }
   ],
   "source": [
    "# 看一个中性的例子\n",
    "sentence = u\"我明天早上出发\"\n",
    "s = SnowNLP(sentence)\n",
    "print(\"Sentiment score = {}\".format(s.sentiments))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Sentiment score = 0.34511311599034533\n"
     ]
    }
   ],
   "source": [
    "# 看一个负面情绪的例子\n",
    "sentence = u\"这件商品的价格太高了，至少我不会买\"\n",
    "s = SnowNLP(sentence)\n",
    "print(\"Sentiment score = {}\".format(s.sentiments))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# SnowNLP同时支持使用用户提供的样本进行训练、保存模型，使用预训练好的模型进行分类等等"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.5"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
